library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
library(ggplot2)
library(urca)
library(readxl)
library(corrplot)
## corrplot 0.92 loaded
library(TSA)
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(fpp)
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(stats)
df_raw <- read.csv('df_ts.csv')
head(df_raw)
## X year month trans
## 1 0 2017 1 12553.95
## 2 1 2017 2 12563.92
## 3 2 2017 3 15303.74
## 4 3 2017 4 14488.56
## 5 4 2017 5 14598.69
## 6 5 2017 6 13781.69
tail(df_raw)
## X year month trans
## 70 69 2022 10 27746.74
## 71 70 2022 11 29250.67
## 72 71 2022 12 29305.43
## 73 72 2023 1 21984.78
## 74 73 2023 2 22173.62
## 75 74 2023 3 27782.57
df <- ts(df_raw$trans, start = c(2017, 1), end = c(2022, 12), frequency = 12)
df
## Jan Feb Mar Apr May Jun Jul Aug
## 2017 12553.95 12563.92 15303.74 14488.56 14598.69 13781.69 13498.71 12182.00
## 2018 14776.45 13061.33 17365.86 16294.64 16626.70 15480.40 15522.47 13294.03
## 2019 16605.26 15133.71 18864.05 18404.15 19546.40 17709.79 17866.30 15096.64
## 2020 16395.52 15602.54 17589.85 17325.76 19431.31 19638.08 17607.72 15162.73
## 2021 17285.17 16528.10 22782.97 21848.27 21984.84 21899.50 20429.43 17667.59
## 2022 21528.63 19774.57 25001.58 25850.27 26215.87 24578.53 22809.14 20821.50
## Sep Oct Nov Dec
## 2017 14354.86 14935.26 19785.80 17377.72
## 2018 14896.52 18825.27 20427.76 19919.02
## 2019 18290.33 20677.79 24100.81 21817.97
## 2020 17655.72 22703.97 24892.93 22473.03
## 2021 20868.34 26445.76 28679.43 28431.62
## 2022 23725.23 27746.74 29250.67 29305.43
df_train <- window(df, start = c(2017, 1), end = c(2021, 12))
df_test <- window(df, start = c(2022,1), end = c(2022, 12))
df_train
## Jan Feb Mar Apr May Jun Jul Aug
## 2017 12553.95 12563.92 15303.74 14488.56 14598.69 13781.69 13498.71 12182.00
## 2018 14776.45 13061.33 17365.86 16294.64 16626.70 15480.40 15522.47 13294.03
## 2019 16605.26 15133.71 18864.05 18404.15 19546.40 17709.79 17866.30 15096.64
## 2020 16395.52 15602.54 17589.85 17325.76 19431.31 19638.08 17607.72 15162.73
## 2021 17285.17 16528.10 22782.97 21848.27 21984.84 21899.50 20429.43 17667.59
## Sep Oct Nov Dec
## 2017 14354.86 14935.26 19785.80 17377.72
## 2018 14896.52 18825.27 20427.76 19919.02
## 2019 18290.33 20677.79 24100.81 21817.97
## 2020 17655.72 22703.97 24892.93 22473.03
## 2021 20868.34 26445.76 28679.43 28431.62
df_test
## Jan Feb Mar Apr May Jun Jul Aug
## 2022 21528.63 19774.57 25001.58 25850.27 26215.87 24578.53 22809.14 20821.50
## Sep Oct Nov Dec
## 2022 23725.23 27746.74 29250.67 29305.43
autoplot(df_train) + ylab('Transaction Amount (KRW)') + ggtitle('Korean Online Shopping Transaction Amount by Year')
autoplot(decompose(df_train, type = 'additive'))
autoplot(decompose(df_train, type = 'multiplicative'))
lambda = BoxCox.lambda(df_train)
lambda # -0.5117632
## [1] -0.5117632
-The Box-Cox transformation was applied to the data, but the observed improvement is not substantial.
cbind('Original Plot' = df_train, 'BoxCox Transformed Plot' = BoxCox(df_train, lambda = lambda)) %>% autoplot(facet = TRUE) + ggtitle('Comparison of BoxCox Transformation')
kpss.test(df_train) # With a p-value of 0.01 (less than the significance level of 0.05), we reject the null hypothesis of stationarity and accept the alternative hypothesis, which suggests non-stationarity.
## Warning in kpss.test(df_train): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: df_train
## KPSS Level = 1.2782, Truncation lag parameter = 3, p-value = 0.01
adf.test(df_train) # p-value = 0.03898 < 0.05, reject the null, and accept the alternative hypothesis, which suggests stationarity.
##
## Augmented Dickey-Fuller Test
##
## data: df_train
## Dickey-Fuller = -3.6217, Lag order = 3, p-value = 0.03898
## alternative hypothesis: stationary
tsdisplay(df_train)
acf(df_train) # The ACF plot reveals clear evidence of strong autocorrelation in the data, exhibiting slow decay and subdued seasonal patterns.
ggseasonplot(df_train, polar = TRUE)
ggseasonplot(df_train)
ggsubseriesplot(df_train)
spectrum(df_train)
periodogram(df_train)
temp <- periodogram(df_train)
max_freq <- temp$freq[which.max(temp$spec)]
seasonality <- 1 / max_freq
seasonality # 6
## [1] 6
1 / temp$freq[5] # The freq[5] is the 4th largest value, which indicates a seasonality of 12.
## [1] 12
tsdisplay(BoxCox(df_train, lambda = lambda)) # Even from the visual inspection of the ACF plot, stationarity was not achieved.
kpss.test(BoxCox(df_train, lambda = lambda)) # p-value = 0.01 (< 0.05, non-stationary)
## Warning in kpss.test(BoxCox(df_train, lambda = lambda)): p-value smaller than
## printed p-value
##
## KPSS Test for Level Stationarity
##
## data: BoxCox(df_train, lambda = lambda)
## KPSS Level = 1.3322, Truncation lag parameter = 3, p-value = 0.01
adf.test(BoxCox(df_train, lambda = lambda)) # p-value = 0.01 (< 0.05, stationary)
## Warning in adf.test(BoxCox(df_train, lambda = lambda)): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: BoxCox(df_train, lambda = lambda)
## Dickey-Fuller = -4.3142, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
tsdisplay(diff(df_train))
kpss.test(diff(df_train)) # p-value = 0.1 (> 0.05, stationary)
## Warning in kpss.test(diff(df_train)): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(df_train)
## KPSS Level = 0.069442, Truncation lag parameter = 3, p-value = 0.1
adf.test(diff(df_train)) # p-value = 0.01 (< 0.05, stationary)
## Warning in adf.test(diff(df_train)): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(df_train)
## Dickey-Fuller = -6.1817, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
tsdisplay(diff(df_train, lags = 6))
kpss.test(diff(df_train, lags = 6)) # p-value = 0.1 (> 0.05, stationary)
## Warning in kpss.test(diff(df_train, lags = 6)): p-value greater than printed p-
## value
##
## KPSS Test for Level Stationarity
##
## data: diff(df_train, lags = 6)
## KPSS Level = 0.069442, Truncation lag parameter = 3, p-value = 0.1
adf.test(diff(df_train, lags = 6)) # p-value = 0.01 (< 0.05, stationary)
## Warning in adf.test(diff(df_train, lags = 6)): p-value smaller than printed p-
## value
##
## Augmented Dickey-Fuller Test
##
## data: diff(df_train, lags = 6)
## Dickey-Fuller = -6.1817, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
tsdisplay(diff(diff(df_train, lags = 6)))
kpss.test(diff(diff(df_train, lags = 6))) # p-value = 0.1 (> 0.05, stationary)
## Warning in kpss.test(diff(diff(df_train, lags = 6))): p-value greater than
## printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff(diff(df_train, lags = 6))
## KPSS Level = 0.034073, Truncation lag parameter = 3, p-value = 0.1
adf.test(diff(diff(df_train, lags = 6))) # p-value = 0.01 (< 0.05, stationary)
## Warning in adf.test(diff(diff(df_train, lags = 6))): p-value smaller than
## printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(diff(df_train, lags = 6))
## Dickey-Fuller = -6.853, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary
m_snaive = snaive(df_train)
summary(m_snaive) # RMSE on training set: 2404.29
##
## Forecast method: Seasonal naive method
##
## Model Information:
## Call: snaive(y = df_train)
##
## Residual sd: 2404.2896
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1863.044 2404.29 2011.82 9.385204 10.23531 1 0.4635676
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2022 17285.17 14203.95 20366.39 12572.849 21997.49
## Feb 2022 16528.10 13446.88 19609.32 11815.779 21240.42
## Mar 2022 22782.97 19701.75 25864.19 18070.649 27495.29
## Apr 2022 21848.27 18767.05 24929.49 17135.949 26560.59
## May 2022 21984.84 18903.62 25066.06 17272.519 26697.16
## Jun 2022 21899.50 18818.28 24980.72 17187.179 26611.82
## Jul 2022 20429.43 17348.21 23510.65 15717.109 25141.75
## Aug 2022 17667.59 14586.37 20748.81 12955.269 22379.91
## Sep 2022 20868.34 17787.12 23949.56 16156.019 25580.66
## Oct 2022 26445.76 23364.54 29526.98 21733.439 31158.08
## Nov 2022 28679.43 25598.21 31760.65 23967.109 33391.75
## Dec 2022 28431.62 25350.40 31512.84 23719.299 33143.94
## Jan 2023 17285.17 12927.67 21642.67 10620.942 23949.40
## Feb 2023 16528.10 12170.60 20885.60 9863.872 23192.33
## Mar 2023 22782.97 18425.47 27140.47 16118.742 29447.20
## Apr 2023 21848.27 17490.77 26205.77 15184.042 28512.50
## May 2023 21984.84 17627.34 26342.34 15320.612 28649.07
## Jun 2023 21899.50 17542.00 26257.00 15235.272 28563.73
## Jul 2023 20429.43 16071.93 24786.93 13765.202 27093.66
## Aug 2023 17667.59 13310.09 22025.09 11003.362 24331.82
## Sep 2023 20868.34 16510.84 25225.84 14204.112 27532.57
## Oct 2023 26445.76 22088.26 30803.26 19781.532 33109.99
## Nov 2023 28679.43 24321.93 33036.93 22015.202 35343.66
## Dec 2023 28431.62 24074.12 32789.12 21767.392 35095.85
checkresiduals(m_snaive)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 45.6, df = 12, p-value = 8.129e-06
##
## Model df: 0. Total lags used: 12
m_ets = ets(df_train)
summary(m_ets) # ETS(M,Ad,M) # RMSE on training set: 847.3451
## ETS(M,Ad,M)
##
## Call:
## ets(y = df_train)
##
## Smoothing parameters:
## alpha = 0.4386
## beta = 0.0203
## gamma = 1e-04
## phi = 0.9748
##
## Initial states:
## l = 13879.1604
## b = 209.4993
## s = 1.1465 1.2557 1.1049 0.9344 0.8033 0.938
## 0.9835 1.0369 0.9977 1.0527 0.8398 0.9065
##
## sigma: 0.0532
##
## AIC AICc BIC
## 1083.117 1099.800 1120.815
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 121.4824 847.3451 625.0957 0.3772927 3.405979 0.3107116
## ACF1
## Training set -0.01756663
checkresiduals(m_ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 9.7846, df = 12, p-value = 0.6349
##
## Model df: 0. Total lags used: 12
m_arima = auto.arima(df_train, lambda = 'auto', seasonal = TRUE, trace = TRUE)
##
## ARIMA(2,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[12] with drift : -588.9985
## ARIMA(1,0,0)(1,1,0)[12] with drift : Inf
## ARIMA(0,0,1)(0,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[12] : -541.3509
## ARIMA(0,0,0)(1,1,0)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,0)(0,1,0)[12] with drift : -596.7387
## ARIMA(1,0,0)(0,1,1)[12] with drift : Inf
## ARIMA(1,0,0)(1,1,1)[12] with drift : Inf
## ARIMA(2,0,0)(0,1,0)[12] with drift : -598.9885
## ARIMA(2,0,0)(1,1,0)[12] with drift : Inf
## ARIMA(2,0,0)(0,1,1)[12] with drift : Inf
## ARIMA(2,0,0)(1,1,1)[12] with drift : Inf
## ARIMA(3,0,0)(0,1,0)[12] with drift : -596.9361
## ARIMA(2,0,1)(0,1,0)[12] with drift : -597.1826
## ARIMA(1,0,1)(0,1,0)[12] with drift : -599.0765
## ARIMA(1,0,1)(1,1,0)[12] with drift : Inf
## ARIMA(1,0,1)(0,1,1)[12] with drift : Inf
## ARIMA(1,0,1)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,1)(0,1,0)[12] with drift : -592.634
## ARIMA(1,0,2)(0,1,0)[12] with drift : -597.2688
## ARIMA(0,0,2)(0,1,0)[12] with drift : -594.8619
## ARIMA(2,0,2)(0,1,0)[12] with drift : -594.5681
## ARIMA(1,0,1)(0,1,0)[12] : -596.868
##
## Best model: ARIMA(1,0,1)(0,1,0)[12] with drift
summary(m_arima) # ARIMA(1,0,1)(0,1,0)[12] with drift # AICc=-599.08 # RMSE on training set = 1867.914
## Series: df_train
## ARIMA(1,0,1)(0,1,0)[12] with drift
## Box Cox transformation: lambda= -0.5117634
##
## Coefficients:
## ar1 ma1 drift
## 0.8416 -0.5170 1e-04
## s.e. 0.2353 0.2191 3e-04
##
## sigma^2 = 1.198e-06: log likelihood = 304
## AIC=-600.01 AICc=-599.08 BIC=-592.52
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 627.0674 1867.914 1436.603 4.084054 8.485267 0.7140816 0.4982809
checkresiduals(m_arima) # The residuals show autocorrelation.
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,1)(0,1,0)[12] with drift
## Q* = 153.34, df = 10, p-value < 2.2e-16
##
## Model df: 2. Total lags used: 12
m_arima2 = auto.arima(df_train, seasonal = TRUE, trace = TRUE)
##
## ARIMA(2,0,2)(1,1,1)[12] with drift : 833.9963
## ARIMA(0,0,0)(0,1,0)[12] with drift : 843.8104
## ARIMA(1,0,0)(1,1,0)[12] with drift : 825.1766
## ARIMA(0,0,1)(0,1,1)[12] with drift : 830.562
## ARIMA(0,0,0)(0,1,0)[12] : 885.666
## ARIMA(1,0,0)(0,1,0)[12] with drift : 832.332
## ARIMA(1,0,0)(1,1,1)[12] with drift : 827.2337
## ARIMA(1,0,0)(0,1,1)[12] with drift : 826.1116
## ARIMA(0,0,0)(1,1,0)[12] with drift : 833.7417
## ARIMA(2,0,0)(1,1,0)[12] with drift : 827.0002
## ARIMA(1,0,1)(1,1,0)[12] with drift : 827.1028
## ARIMA(0,0,1)(1,1,0)[12] with drift : 828.7945
## ARIMA(2,0,1)(1,1,0)[12] with drift : 829.1155
## ARIMA(1,0,0)(1,1,0)[12] : 834.7879
##
## Best model: ARIMA(1,0,0)(1,1,0)[12] with drift
summary(m_arima2) # ARIMA(1,0,0)(1,1,0)[12] with drift # AICc=825.18 # RMSE on training set: 1022.652
## Series: df_train
## ARIMA(1,0,0)(1,1,0)[12] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.4931 -0.5178 150.1088
## s.e. 0.1390 0.1421 19.5956
##
## sigma^2 = 1394424: log likelihood = -408.12
## AIC=824.25 AICc=825.18 BIC=831.73
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.180466 1022.652 695.4615 -0.4203852 3.666462 0.3456878
## ACF1
## Training set -0.0362158
checkresiduals(m_arima2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0)(1,1,0)[12] with drift
## Q* = 11.162, df = 10, p-value = 0.345
##
## Model df: 2. Total lags used: 12
m_arima3 = auto.arima(df_train, lambda = 'auto', seasonal = FALSE, trace = TRUE)
##
## ARIMA(2,1,2) with drift : -674.069
## ARIMA(0,1,0) with drift : -650.2057
## ARIMA(1,1,0) with drift : -648.0257
## ARIMA(0,1,1) with drift : -648.0629
## ARIMA(0,1,0) : -651.8114
## ARIMA(1,1,2) with drift : Inf
## ARIMA(2,1,1) with drift : Inf
## ARIMA(3,1,2) with drift : Inf
## ARIMA(2,1,3) with drift : Inf
## ARIMA(1,1,1) with drift : -645.8897
## ARIMA(1,1,3) with drift : Inf
## ARIMA(3,1,1) with drift : Inf
## ARIMA(3,1,3) with drift : Inf
## ARIMA(2,1,2) : Inf
##
## Best model: ARIMA(2,1,2) with drift
summary(m_arima3) # # ARIMA(2,1,2) with drift # AICc=-674.07 # RMSE on training set: 1954.353
## Series: df_train
## ARIMA(2,1,2) with drift
## Box Cox transformation: lambda= -0.5117634
##
## Coefficients:
## ar1 ar2 ma1 ma2 drift
## 0.8847 -0.6381 -1.3223 0.3837 1e-04
## s.e. 0.1443 0.1103 0.1605 0.1525 0e+00
##
## sigma^2 = 5.977e-07: log likelihood = 343.84
## AIC=-675.68 AICc=-674.07 BIC=-663.22
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 237.8892 1954.353 1523.521 0.4571697 8.305187 0.7572853 0.04131673
checkresiduals(m_arima3)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,2) with drift
## Q* = 36.631, df = 8, p-value = 1.346e-05
##
## Model df: 4. Total lags used: 12
m_arima4 = auto.arima(df_train, seasonal = FALSE, trace = TRUE)
##
## ARIMA(2,1,2) with drift : Inf
## ARIMA(0,1,0) with drift : 1097.359
## ARIMA(1,1,0) with drift : 1098.72
## ARIMA(0,1,1) with drift : 1097.933
## ARIMA(0,1,0) : 1095.867
## ARIMA(1,1,1) with drift : 1100.037
##
## Best model: ARIMA(0,1,0)
summary(m_arima4) # ARIMA(0,1,0) # AICc=1095.87 # RMSE on training set: 2545.609
## Series: df_train
## ARIMA(0,1,0)
##
## sigma^2 = 6589960: log likelihood = -546.9
## AIC=1095.8 AICc=1095.87 BIC=1097.87
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 264.8371 2545.609 1993.664 0.4077063 10.90008 0.9909757 0.1214998
checkresiduals(m_arima4)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 95.138, df = 12, p-value = 4.996e-15
##
## Model df: 0. Total lags used: 12
m_tabats <- tbats(df_train)
summary(m_tabats) # AIC: 1094.199
## Length Class Mode
## lambda 1 -none- numeric
## alpha 1 -none- numeric
## beta 1 -none- numeric
## damping.parameter 1 -none- numeric
## gamma.one.values 1 -none- numeric
## gamma.two.values 1 -none- numeric
## ar.coefficients 1 -none- numeric
## ma.coefficients 2 -none- numeric
## likelihood 1 -none- numeric
## optim.return.code 1 -none- numeric
## variance 1 -none- numeric
## AIC 1 -none- numeric
## parameters 2 -none- list
## seed.states 15 -none- numeric
## fitted.values 60 ts numeric
## errors 60 ts numeric
## x 900 -none- numeric
## seasonal.periods 1 -none- numeric
## k.vector 1 -none- numeric
## y 60 ts numeric
## p 1 -none- numeric
## q 1 -none- numeric
## call 2 -none- call
## series 1 -none- character
## method 1 -none- character
checkresiduals(m_tabats)
##
## Ljung-Box test
##
## data: Residuals from TBATS
## Q* = 7.2169, df = 12, p-value = 0.843
##
## Model df: 0. Total lags used: 12
fc_snaive <- forecast(m_snaive, h = 12)
accuracy(fc_snaive, df_test) # RMSE on training set: 2404.290, RMSE on test set: 2902.843
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1863.044 2404.290 2011.820 9.385204 10.23531 1.00000 0.4635676
## Test set 2646.428 2902.843 2646.428 11.230674 11.23067 1.31544 0.4961358
## Theil's U
## Training set NA
## Test set 1.014201
autoplot(df) + autolayer(fc_snaive$mean, series = 'forecasts') + ylab('Transaction Amount') + ggtitle('Forecast for 2022 using the Seasonal Naive Model')
fc_ets <- forecast(m_ets, h = 12)
accuracy(fc_ets, df_test) # RMSE on training set: 847.3451, RMSE on test set: 1010.9793
## ME RMSE MAE MPE MAPE MASE
## Training set 121.4824 847.3451 625.0957 0.3772927 3.405979 0.3107116
## Test set -238.8255 1010.9793 695.7457 -0.8637843 2.728968 0.3458291
## ACF1 Theil's U
## Training set -0.01756663 NA
## Test set 0.08256269 0.3491929
autoplot(df) + autolayer(fc_ets$mean, series = 'forecasts') + ylab('Transaction Amount') + ggtitle('Forecast for 2022 using the ETS Model')
fc_arima2 <- forecast(m_arima2, h = 12)
accuracy(fc_arima2, df_test) # RMSE on training set: 1022.652 , RMSE on test set: 1638.319
## ME RMSE MAE MPE MAPE MASE
## Training set -1.180466 1022.652 695.4615 -0.4203852 3.666462 0.3456878
## Test set 1279.388526 1638.319 1313.3197 5.1807185 5.296720 0.6528019
## ACF1 Theil's U
## Training set -0.0362158 NA
## Test set 0.4324256 0.6205241
autoplot(df) + autolayer(fc_arima2$mean, series = 'forecasts') + ylab('Transaction Amount') + ggtitle('Forecast for 2022 using the ARIMA Model')
fc_arima3 <- forecast(m_arima3, h = 12)
accuracy(fc_arima3, df_test) # RMSE on training set: 1954.353 , RMSE on test set: 2369.961
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 237.8892 1954.353 1523.521 0.4571697 8.305187 0.7572853 0.04131673
## Test set 533.9091 2369.961 2049.811 1.3026008 8.188535 1.0188843 0.53807209
## Theil's U
## Training set NA
## Test set 0.904056
autoplot(df) + autolayer(fc_arima3$mean, series = 'forecasts') + ylab('Transaction Amount') + ggtitle('Forecast for 2022 using the ARIMA Model')
fc_arima4 <- forecast(m_arima4, h = 12)
accuracy(fc_arima4, df_test) # RMSE on training set: 2545.609 , RMSE on test set: 4781.561
## ME RMSE MAE MPE MAPE MASE
## Training set 264.8371 2545.609 1993.664 0.4077063 10.90008 0.9909757
## Test set -3714.2733 4781.561 3996.417 -16.7833001 17.74694 1.9864687
## ACF1 Theil's U
## Training set 0.1214998 NA
## Test set 0.5446097 1.75519
autoplot(df) + autolayer(fc_arima4$mean, series = 'forecasts') + ylab('Transaction Amount') + ggtitle('Forecast for 2022 using the ARIMA Model')
fc_tabats <- forecast(m_tabats, h = 12)
accuracy(fc_tabats, df_test) # RMSE on training set: 832.3611 , RMSE on test set: 1593.6982
## ME RMSE MAE MPE MAPE MASE
## Training set 148.4739 832.3611 641.1142 0.6449065 3.526768 0.3186738
## Test set -973.9135 1593.6982 1171.4843 -3.7086419 4.488572 0.5823009
## ACF1 Theil's U
## Training set -0.02754765 NA
## Test set 0.50788883 0.5459561
autoplot(df) + autolayer(fc_tabats$mean, series = 'forecasts') + ylab('Transaction Amount') + ggtitle('Forecast for 2022 using the tbats Model')
m_ets <- ets(df) # Retrain the ets model using the whole data set without spliting train and test data set
fc_ets <- forecast(m_ets, 12)
autoplot(df) + autolayer(fc_ets$mean, series = 'forecasts') + ylab('Transaction Amount') + ggtitle('Forecast for 2023 using the ETS Model')
fcst2023 = sum(fc_ets$mean)
fcst2023 # 317068.9 # We expect 317068.9 for online sales transaction amount in 2023.
## [1] 317068.9
actual2022 = sum(window(df, start = c(2022,1), end = c(2022,12)))
actual2022 # 296608.2 # It was 296608.2 for online sales transaction amount in 2022.
## [1] 296608.2
actual2021 = sum(window(df, start = c(2021,1), end = c(2021,12)))
actual2021 # 264851 # It was 264851 for online sales transaction amount in 2022.
## [1] 264851
(fcst2023 / actual2022) - 1 # 0.06898235 # The expected growth rate in 2023
## [1] 0.06898235
(actual2022 / actual2021) - 1 # 0.1199057 # The growth rate in 2022
## [1] 0.1199057
# Comparison between the expected and actual transaction amount between January to March in 2023.
df_raw$trans[(length(df_raw$trans) - 2):length(df_raw$trans)]
## [1] 21984.78 22173.62 27782.57
fc_ets$mean
## Jan Feb Mar Apr May Jun Jul Aug
## 2023 22814.06 21933.97 27011.57 26505.81 27100.29 25769.12 24567.69 21342.18
## Sep Oct Nov Dec
## 2023 24824.36 29797.53 34037.30 31365.01